library('rgl') # to draw 3D plot
library('modeest') # to estimatate MAP
library('coda') # to get HPDI (HDI)
library('grid') # to display figures on grid
library('dplyr')
library('tidyr')
library('rstan')この問題の統計モデルは、次の通りである。 \[y_i \sim \mbox{Bernoulli}(\theta),\] \[\theta \sim \mbox{Beta}(a, b),\] \[a = \omega (\kappa - 2) + 1,\] \[b = (1 - \omega) (\kappa - 2) + 1,\] \[ \omega \sim \mbox{Beta}(A_\omega, B_\omega),\] \[\kappa = K.\]
ベータ分布の2つの形状母数を最頻値 \(\omega\) と集中度(事前確率の重み) \(\kappa\) から求める。
beta_shapes <- function(mode, kappa) {
# Find the shape parameters of the beta that gives us a specific
# mode and kappa
# Args: mode in (0, 1)
# kappa > 2, the weight of the prior (concentration)
# Return: list of shape parameters
mode <- ifelse(mode >=0 & mode <=1, mode, NaN)
kappa <- ifelse(kappa > 2, kappa, NaN)
shape1 <- mode * (kappa - 2) + 1
shape2 <- kappa - shape1
return(list(shape1 = shape1, shape2 = shape2))
}まず、グリッドを作る。事前分布であるベータ分布の集中度(事前分布に与える重み)は定数\(K\)である(推定する母数ではない)と仮定し、さらにその値を\(K = 100\) とする。また、\(\omega\)の超事前分布は Beta(\(A_\omega = 2, B_\omega = 2\)) であるとする。グリッドの細かさは grid_gap で設定する(値が小さいほど正確な近似)。 さらに、グリッド上の事前確率を求める。 事前確率は、 \[p(\theta, \omega) = p(\theta | \omega)p(\omega)\] である。
K <- 100 # concentration of Beta prior
A_omega <- B_omega <- 2 # shape parameters of hyperprior
grid_gap <- .001
df_grid1 <- expand.grid(theta = seq(0, 1, by = grid_gap),
omega = seq(0, 1, by = grid_gap)) %>%
mutate(p_theta = dbeta(theta, # p(theta | omega)
shape1 = beta_shapes(omega, K)$shape1,
shape2 = beta_shapes(omega, K)$shape2),
p_omega = dbeta(omega, shape1 = A_omega, shape2 = B_omega), # p(omega)
prior = p_theta * p_omega)
glimpse(df_grid1)## Observations: 1,002,001
## Variables: 5
## $ theta (dbl) 0.000, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007...
## $ omega (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ p_theta (dbl) 99.000000, 89.753840, 81.363241, 73.749777, 66.842142,...
## $ p_omega (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ prior (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
グリッド上の事前確率を図示する。
# 3-D plot: The result doesn't appear on HTML
plot3d(df_grid1[c('theta', 'omega', 'prior')], col = 'royalblue')
# Contour of the joint prior p(theta, omega)
cont_prior <- ggplot(df_grid1, aes(x = theta, y = omega, z = prior)) +
geom_contour(color = 'royalblue') +
labs(x = expression(theta), y = expression(omega))
# Marginals
# marginal of omega p(omega)
m_omega <- df_grid1 %>%
group_by(omega) %>%
summarize(m_p_omega = sum(prior)) %>%
mutate(m_p_omega = m_p_omega / sum(grid_gap * m_p_omega)) %>%
ggplot(aes(omega, m_p_omega)) +
geom_line(color = 'royalblue') +
labs(x = expression(omega), y = expression(paste('margnial ', p(omega)))) +
coord_flip()
# marginal of theta p(theta)
m_theta <- df_grid1 %>%
group_by(theta) %>%
summarize(m_p_theta = sum(prior)) %>%
mutate(m_p_theta = m_p_theta / sum(grid_gap * m_p_theta)) %>%
ggplot(aes(theta, m_p_theta)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta), y = expression(paste('marginal ', p(theta))))
# conditional p(theta | omega = .75)
m_theta_c1 <- df_grid1 %>%
filter(omega == .75) %>%
group_by(theta) %>%
summarize(m_p_theta = sum(prior)) %>%
mutate(m_p_theta = m_p_theta / sum(grid_gap * m_p_theta)) %>%
ggplot(aes(theta, m_p_theta)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta),
y = expression(paste('p(', theta, '|', omega, '= .75)')))
# conditional p(theta | omega = .25)
m_theta_c2 <- df_grid1 %>%
filter(omega == .25) %>%
group_by(theta) %>%
summarize(m_p_theta = sum(prior)) %>%
mutate(m_p_theta = m_p_theta / sum(grid_gap * m_p_theta)) %>%
ggplot(aes(theta, m_p_theta)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta),
y = expression(paste('p(', theta, '|', omega, '= .25)')))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(4, 4))) # 4 by 4 grid by viewport
print(cont_prior, vp = viewport(layout.pos.row = 1:2, layout.pos.col =1:2))
print(m_omega, vp = viewport(layout.pos.row = 1:2, layout.pos.col = 3:4))
print(m_theta, vp = viewport(layout.pos.row = 3:4, layout.pos.col = 1:2))
print(m_theta_c1, vp = viewport(layout.pos.row = 3, layout.pos.col = 3:4))
print(m_theta_c2, vp = viewport(layout.pos.row = 4, layout.pos.col = 3:4))データは D = {9 heads, 3 tails}。このとき尤度は、 \[L(\theta|D, \omega) = p(D | \theta, \omega) = p(D|\theta) = \prod_{i=1}^{12} \theta^{y_i} (1-\theta)^{1-y_i} =\theta^9 (1 - \theta)^3.\] この尤度関数を図示する。
df_grid1 <- df_grid1 %>%
mutate(likelihood = theta^9 * (1 - theta)^3)
# 3D plot: the result doesn't appear on HTML
plot3d(df_grid1[c('theta', 'omega', 'likelihood')], col = 'red')
# Contour of the likelihood
cont_likelihood <- ggplot(df_grid1, aes(x = theta, y = omega, z = likelihood)) +
geom_contour(color = 'red') +
labs(x = expression(theta), y = expression(omega))
print(cont_likelihood)事前確率と尤度から事後確率を求めて事後分布を図示する。 求める事後確率は、 \[p(\theta, \omega | D) \propto p(D | \theta, \omega) p(\theta, \omega) = p(D | \theta) p(\theta|\omega) p(\omega)\] である。最後に、確率を正規化(確率の和が1になるように調整)する。
df_grid1 <- df_grid1 %>%
mutate(posterior = likelihood * prior, # post is prop to lik * prior
posterior = posterior / sum(grid_gap^2 * posterior)) # normalize
# 3D plot: not shown in HTML
plot3d(df_grid1[c('theta', 'omega', 'posterior')], col = 'darkgreen')
# Contour of posterior
cont_post <- ggplot(df_grid1, aes(x = theta, y = omega, z = posterior)) +
geom_contour(color = 'darkgreen') +
xlim(0, 1) + ylim(0, 1) +
labs(x = expression(theta), y = expression(omega))
# Marginals
# marginal of posterior omega p(omega | D)
post_omega <- df_grid1 %>%
group_by(omega) %>%
summarize(post_omega = sum(posterior)) %>%
mutate(post_omega = post_omega / sum(grid_gap * post_omega)) %>%
ggplot(aes(omega, post_omega)) +
geom_line(color = 'darkgreen') +
labs(x = expression(omega),
y = expression(paste('margnial ', 'p(', omega, '|D)'))) +
coord_flip()
# marginal of posterior theta p(theta | D)
post_theta <- df_grid1 %>%
group_by(theta) %>%
summarize(post_theta = sum(posterior)) %>%
mutate(post_theta = post_theta / sum(grid_gap * post_theta)) %>%
ggplot(aes(theta, post_theta)) +
geom_line(color = 'darkgreen') +
labs(x = expression(theta),
y = expression(paste('marginal ', 'p(', theta, '|D)')))
# conditional posterior p(theta | omega = .75, D)
post_theta_c1 <- df_grid1 %>%
filter(omega == .75) %>%
group_by(theta) %>%
summarize(post_theta = sum(posterior)) %>%
mutate(post_theta = post_theta / sum(grid_gap * post_theta)) %>%
ggplot(aes(theta, post_theta)) +
geom_line(color = 'darkgreen') +
labs(x = expression(theta),
y = expression(paste('p(', theta, '|', omega, '= .75, D)')))
# conditional posterior p(theta | omega = .25, D)
post_theta_c2 <- df_grid1 %>%
filter(omega == .25) %>%
group_by(theta) %>%
summarize(post_theta = sum(posterior)) %>%
mutate(post_theta = post_theta / sum(grid_gap * post_theta)) %>%
ggplot(aes(theta, post_theta)) +
geom_line(color = 'darkgreen') +
labs(x = expression(theta),
y = expression(paste('p(', theta, '|', omega, '= .25, D)')))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(4, 4))) # 4 by 4 grid by viewport
print(cont_post, vp = viewport(layout.pos.row = 1:2, layout.pos.col =1:2))
print(post_omega, vp = viewport(layout.pos.row = 1:2, layout.pos.col = 3:4))
print(post_theta, vp = viewport(layout.pos.row = 3:4, layout.pos.col = 1:2))
print(post_theta_c1, vp = viewport(layout.pos.row = 3, layout.pos.col = 3:4))
print(post_theta_c2, vp = viewport(layout.pos.row = 4, layout.pos.col = 3:4))この事後分布を使って、コインのバイアス\(\theta\)について考察する。 そのために、\(\theta\)の周辺事後分布\(p(\theta|D)\)から10,000個の値を無作為に抽出し、そのサンプルを利用する。
grid1_post <- post_theta$data
post1 <- sample(grid1_post$theta, size = 10000,
prob = grid1_post$post_theta, replace = TRUE)
hist(post1, col = "gray", freq = FALSE,
xlab = expression(paste('p(', theta, '|D)')), main = '')推定値を求める。
# posterior mean
mean(post1)## [1] 0.6878056
# MAP
mfv(post1)## [1] 0.702
# 90% posterior interval
quantile(post1, c(.05, .95))## 5% 95%
## 0.491 0.860
# 90% HPDI
HPDinterval(as.mcmc(post1), prob = .9)## lower upper
## var1 0.507 0.871
## attr(,"Probability")
## [1] 0.9
\(A_\omega = B_\omega = 20\), \(K=6\) について、Case 1 と同様にグリッド近似を行う。依拠する統計モデルはCase 1 とまったく同じである。
まず、グリッドを作ってグリッド上の事前確率を計算する。
K <- 6 # concentration of Beta prior
A_omega <- B_omega <- 20
grid_gap <- .001
df_grid2 <- expand.grid(theta = seq(0, 1, by = grid_gap),
omega = seq(0, 1, by = grid_gap)) %>%
mutate(p_theta = dbeta(theta, # p(theta | omega)
shape1 = beta_shapes(omega, K)$shape1,
shape2 = beta_shapes(omega, K)$shape2),
p_omega = dbeta(omega, shape1 = A_omega, shape2 = B_omega), # p(omega)
prior = p_theta * p_omega)事前確率を図示する。
# 3-D plot: The result doesn't appear on HTML
plot3d(df_grid2[c('theta', 'omega', 'prior')], col = 'royalblue')
# Contour of the joint prior p(theta, omega)
cont_prior <- cont_prior %+% df_grid2 + xlim(0, 1) + ylim(0, 1)
# Marginals
# marginal of omega p(omega)
m_omega <- df_grid2 %>%
group_by(omega) %>%
summarize(m_p_omega = sum(prior)) %>%
mutate(m_p_omega = m_p_omega / sum(grid_gap * m_p_omega)) %>%
ggplot(aes(omega, m_p_omega)) +
geom_line(color = 'royalblue') +
labs(x = expression(omega), y = expression(paste('margnial ', p(omega)))) +
coord_flip()
# marginal of theta p(theta)
m_theta <- df_grid2 %>%
group_by(theta) %>%
summarize(m_p_theta = sum(prior)) %>%
mutate(m_p_theta = m_p_theta / sum(grid_gap * m_p_theta)) %>%
ggplot(aes(theta, m_p_theta)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta), y = expression(paste('marginal ', p(theta))))
# conditional p(theta | omega = .75)
m_theta_c1 <- df_grid2 %>%
filter(omega == .75) %>%
group_by(theta) %>%
summarize(m_p_theta = sum(prior)) %>%
mutate(m_p_theta = m_p_theta / sum(grid_gap * m_p_theta)) %>%
ggplot(aes(theta, m_p_theta)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta),
y = expression(paste('p(', theta, '|', omega, '= .75)')))
# conditional p(theta | omega = .25)
m_theta_c2 <- df_grid2 %>%
filter(omega == .25) %>%
group_by(theta) %>%
summarize(m_p_theta = sum(prior)) %>%
mutate(m_p_theta = m_p_theta / sum(grid_gap * m_p_theta)) %>%
ggplot(aes(theta, m_p_theta)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta),
y = expression(paste('p(', theta, '|', omega, '= .25)')))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(4, 4))) # 4 by 4 grid by viewport
print(cont_prior, vp = viewport(layout.pos.row = 1:2, layout.pos.col =1:2))
print(m_omega, vp = viewport(layout.pos.row = 1:2, layout.pos.col = 3:4))
print(m_theta, vp = viewport(layout.pos.row = 3:4, layout.pos.col = 1:2))
print(m_theta_c1, vp = viewport(layout.pos.row = 3, layout.pos.col = 3:4))
print(m_theta_c2, vp = viewport(layout.pos.row = 4, layout.pos.col = 3:4))尤度を計算(Case 1 とまったく同じ)して図示する。
df_grid2 <- df_grid2 %>%
mutate(likelihood = theta^9 * (1 - theta)^3)
# 3D plot: the result doesn't appear on HTML
plot3d(df_grid1[c('theta', 'omega', 'likelihood')], col = 'red')
# Contour of the likelihood
cont_likelihood <- cont_likelihood %+% df_grid2
print(cont_likelihood)事後確率を求めて図示する。
df_grid2 <- df_grid2 %>%
mutate(posterior = likelihood * prior, # posterior is proportional to likelihood times prior
posterior = posterior / sum(grid_gap^2 * posterior)) # normalize
# 3D plot: not shown in HTML
plot3d(df_grid2[c('theta', 'omega', 'posterior')], col = 'darkgreen')
# Contour of posterior
cont_post <- cont_post %+% df_grid2
# Marginals
# marginal of posterior omega p(omega | D)
post_omega <- df_grid2 %>%
group_by(omega) %>%
summarize(post_omega = sum(posterior)) %>%
mutate(post_omega = post_omega / sum(grid_gap * post_omega)) %>%
ggplot(aes(omega, post_omega)) +
geom_line(color = 'darkgreen') +
labs(x = expression(omega),
y = expression(paste('margnial ', 'p(', omega, '|D)'))) +
coord_flip()
# marginal of posterior theta p(theta | D)
post_theta <- df_grid2 %>%
group_by(theta) %>%
summarize(post_theta = sum(posterior)) %>%
mutate(post_theta = post_theta / sum(grid_gap * post_theta)) %>%
ggplot(aes(theta, post_theta)) +
geom_line(color = 'darkgreen') +
labs(x = expression(theta),
y = expression(paste('marginal ', 'p(', theta, '|D)')))
# conditional posterior p(theta | omega = .75, D)
post_theta_c1 <- df_grid2 %>%
filter(omega == .75) %>%
group_by(theta) %>%
summarize(post_theta = sum(posterior)) %>%
mutate(post_theta = post_theta / sum(grid_gap * post_theta)) %>%
ggplot(aes(theta, post_theta)) +
geom_line(color = 'darkgreen') +
labs(x = expression(theta),
y = expression(paste('p(', theta, '|', omega, '= .75, D)')))
# conditional posterior p(theta | omega = .25, D)
post_theta_c2 <- df_grid2 %>%
filter(omega == .25) %>%
group_by(theta) %>%
summarize(post_theta = sum(posterior)) %>%
mutate(post_theta = post_theta / sum(grid_gap * post_theta)) %>%
ggplot(aes(theta, post_theta)) +
geom_line(color = 'darkgreen') +
labs(x = expression(theta),
y = expression(paste('p(', theta, '|', omega, '= .25, D)')))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(4, 4))) # 4 by 4 grid by viewport
print(cont_post, vp = viewport(layout.pos.row = 1:2, layout.pos.col =1:2))
print(post_omega, vp = viewport(layout.pos.row = 1:2, layout.pos.col = 3:4))
print(post_theta, vp = viewport(layout.pos.row = 3:4, layout.pos.col = 1:2))
print(post_theta_c1, vp = viewport(layout.pos.row = 3, layout.pos.col = 3:4))
print(post_theta_c2, vp = viewport(layout.pos.row = 4, layout.pos.col = 3:4))先ほどと同様に、\(\theta\)の周辺事後分布から10,000個の値を無作為抽出し、\(\theta\)について考察する。
grid2_post <- post_theta$data
post2 <- sample(grid2_post$theta, size = 10000,
prob = grid2_post$post_theta, replace = TRUE)
hist(post2, col = "gray", freq = FALSE,
xlab = expression(paste('p(', theta, '|D)')), main = '')# posteriror mean
mean(post2)## [1] 0.6698635
# MAP
mfv(post2)## [1] 0.725
# 90% posterior interval
quantile(post2, c(.05, .95))## 5% 95%
## 0.480 0.839
# 90% HPDI
HPDinterval(as.mcmc(post2), prob = .9)## lower upper
## var1 0.49 0.845
## attr(,"Probability")
## [1] 0.9
以上の推定をStan を使って行ってみよう。
まず、Stan モデルを書く。Case 1 とCase 2が同じモデルで分析できるよう、\(K\)と\(A_\omega\), \(B_\omega\) をデータ K, A, B として渡すことにする。
model_1 <- "
data {
int<lower=1> N;
int<lower=0, upper=1> y[N];
real<lower=2> K;
real<lower=0> A;
real<lower=0> B;
}
parameters {
real<lower=0, upper=1> theta;
real<lower=0, upper=1> omega;
}
transformed parameters {
real<lower=0> a;
real<lower=0> b;
a <- omega * (K - 2) + 1;
b <- (1 - omega) * (K - 2) + 1;
}
model {
for (i in 1:N)
y[i] ~ bernoulli(theta);
theta ~ beta(a, b);
omega ~ beta(A, B);
}
"
dir.create('kruschke-ch09', showWarnings = FALSE)
write(model_1, 'kruschke-ch09/model-1.stan')RStanを使ってStanで推定を行う。
y <- rep(c(1, 0), c(9, 3)) # outcome
myd <- list(y = y, N = length(y), K = 100, A = 2, B = 2)
stanfit1 <- stan(file = 'kruschke-ch09/model-1.stan', data = myd,
warmup = 1000, iter = 4000, chains = 4)##
## SAMPLING FOR MODEL 'model-1' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 1, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 1, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 1, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 1, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 1, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 1, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 1, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 1, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 1, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 1, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 1, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.030374 seconds (Warm-up)
## # 0.072286 seconds (Sampling)
## # 0.10266 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-1' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 2, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 2, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 2, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 2, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 2, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 2, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 2, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 2, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 2, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 2, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 2, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.032942 seconds (Warm-up)
## # 0.082539 seconds (Sampling)
## # 0.115481 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-1' NOW (CHAIN 3).
##
## Chain 3, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 3, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 3, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 3, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 3, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 3, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 3, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 3, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 3, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 3, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 3, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 3, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.026231 seconds (Warm-up)
## # 0.073299 seconds (Sampling)
## # 0.09953 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-1' NOW (CHAIN 4).
##
## Chain 4, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 4, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 4, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 4, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 4, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 4, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 4, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 4, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 4, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 4, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 4, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 4, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.025724 seconds (Warm-up)
## # 0.082449 seconds (Sampling)
## # 0.108173 seconds (Total)
## #
推定結果とトレースプロットを表示する。
print(stanfit1, pars = c('theta', 'omega'), probs = c(.05, .5, .95))## Inference for Stan model: model-1.
## 4 chains, each with iter=4000; warmup=1000; thin=1;
## post-warmup draws per chain=3000, total post-warmup draws=12000.
##
## mean se_mean sd 5% 50% 95% n_eff Rhat
## theta 0.68 0 0.11 0.48 0.69 0.85 1882 1
## omega 0.68 0 0.12 0.47 0.69 0.86 1872 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jun 23 22:27:06 2016.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(stanfit1, pars = c('theta', 'omega'))## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)
traceplot(stanfit1, pars = c('theta', 'omega'))\(\hat{R}\) は1であり、トレースプロットを見る限り、収束していないというはっきりした証拠は見つからない。
この結果を使い、\(\theta\)について考える。
post_stan1 <- extract(stanfit1, pars = c('theta', 'omega'))
hist(post_stan1$theta, col = "gray", freq = FALSE,
xlab = expression(paste('p(', theta, '|D)')), main = '')# MAP (estiamte)
mlv(post_stan1$theta, method = 'naive', bw = 1/5)## Mode (most likely value): 0.6723574
## Bickel's modal skewness: 0.1235
## Call: mlv.default(x = post_stan1$theta, bw = 1/5, method = "naive")
# 90% HPDI
HPDinterval(as.mcmc(as.vector(post_stan1$theta)), prob = .9)## lower upper
## var1 0.5038838 0.8675141
## attr(,"Probability")
## [1] 0.9
グリッド近似の場合とほぼ同じ結果が得られる(はず)。
同様に、Case 2についてもStanで推定してみよう。データの K, A, B の値だけ変えて、以下のように実行する。
myd <- list(y = y, N = length(y), K = 6, A = 20, B = 20)
stanfit2 <- stan(file = 'kruschke-ch09/model-1.stan', data = myd,
warmup = 1000, iter = 4000, chains = 4)##
## SAMPLING FOR MODEL 'model-1' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 1, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 1, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 1, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 1, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 1, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 1, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 1, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 1, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 1, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 1, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 1, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.013632 seconds (Warm-up)
## # 0.038391 seconds (Sampling)
## # 0.052023 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-1' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 2, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 2, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 2, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 2, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 2, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 2, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 2, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 2, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 2, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 2, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 2, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.0162 seconds (Warm-up)
## # 0.046058 seconds (Sampling)
## # 0.062258 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-1' NOW (CHAIN 3).
##
## Chain 3, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 3, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 3, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 3, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 3, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 3, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 3, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 3, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 3, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 3, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 3, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 3, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.012744 seconds (Warm-up)
## # 0.035169 seconds (Sampling)
## # 0.047913 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-1' NOW (CHAIN 4).
##
## Chain 4, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 4, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 4, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 4, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 4, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 4, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 4, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 4, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 4, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 4, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 4, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 4, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.013013 seconds (Warm-up)
## # 0.041974 seconds (Sampling)
## # 0.054987 seconds (Total)
## #
print(stanfit2, pars = c('theta', 'omega'), probs = c(.05, .5, .95))## Inference for Stan model: model-1.
## 4 chains, each with iter=4000; warmup=1000; thin=1;
## post-warmup draws per chain=3000, total post-warmup draws=12000.
##
## mean se_mean sd 5% 50% 95% n_eff Rhat
## theta 0.67 0 0.11 0.48 0.68 0.84 6908 1
## omega 0.52 0 0.08 0.39 0.52 0.64 7542 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jun 23 22:27:08 2016.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(stanfit2, pars = c('theta', 'omega'))traceplot(stanfit2, pars = c('theta', 'omega'))事後分布から\(\theta\)について考察する。
post_stan2 <- extract(stanfit2, pars = c('theta', 'omega'))
hist(post_stan2$theta, col = "gray", freq = FALSE,
xlab = expression(paste('p(', theta, '|D)')), main = '')# MAP (estimate)
mlv(post_stan2$theta, method = 'naive', bw = 1/5)## Mode (most likely value): 0.6604532
## Bickel's modal skewness: 0.1111667
## Call: mlv.default(x = post_stan2$theta, bw = 1/5, method = "naive")
# 90% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$theta)), prob = .9)## lower upper
## var1 0.5006783 0.8562445
## attr(,"Probability")
## [1] 0.9
やはり、グリッド近似とほぼ同じ結果が得られる。 この問題くらい単純な推定は、グリッド近似で十分実行可能である。
この問題の統計モデルは、次のように表現できる。 \[y_{i|s} \sim \mbox{Bernoulli}(\theta_s),\] \[\theta_s \sim \mbox{Beta}(a, b),\] \[a = \omega (\kappa - 2) + 1,\] \[b = (1 - \omega) (\kappa - 2) + 1,\] \[ \omega \sim \mbox{Beta}(A_\omega, B_\omega),\] \[\kappa = K.\]
コインが2枚の状況を考える。推定したいパラメタは、各コインのバイアス\(\theta_1\), \(\theta_2\) と、両方のバイアスに影響を与える鋳造所のバイアス\(\omega\)である。以下の2つのケースについて事後分布を求める。
まず、グリッドを作り、グリッド上の事前確率を求める。 事前確率は、 \[p(\theta_1, \theta_2 \omega) = p(\theta_1 | \omega) p(\theta_2 | \omega) p(\omega)\] である。
K <- 5 # concentration of Beta prior
A_omega <- B_omega <- 2 # shape parameters of hyperprior
grid_gap <- .01
df_grid3 <- expand.grid(theta1 = seq(0, 1, by = grid_gap),
theta2 = seq(0, 1, by = grid_gap),
omega = seq(0, 1, by = grid_gap)) %>%
mutate(p_theta1 = dbeta(theta1, # p(theta1 | omega)
shape1 = beta_shapes(omega, K)$shape1,
shape2 = beta_shapes(omega, K)$shape2),
p_theta2 = dbeta(theta2, # p(theta2 | omega)
shape1 = beta_shapes(omega, K)$shape1,
shape2 = beta_shapes(omega, K)$shape2),
p_omega = dbeta(omega, shape1 = A_omega, shape2 = B_omega), # p(omega)
prior = p_theta1 * p_theta2 * p_omega)
glimpse(df_grid3)## Observations: 1,030,301
## Variables: 7
## $ theta1 (dbl) 0.00, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08,...
## $ theta2 (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ omega (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ p_theta1 (dbl) 4.000000, 3.881196, 3.764768, 3.650692, 3.538944, 3.4...
## $ p_theta2 (dbl) 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,...
## $ p_omega (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ prior (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
いくつかの周辺事前確率を図示する。
# Contour of the joint prior p(theta1, omega)
cont_prior1 <- df_grid3 %>%
group_by(theta1, omega) %>%
summarize(p_theta1_omega = sum(prior)) %>%
ggplot(aes(x = theta1, y = omega, z = p_theta1_omega)) +
geom_contour(color = 'royalblue') +
labs(x = expression(theta[1]), y = expression(omega))
# Contour of the joint prior p(theta2, omega)
cont_prior2 <- df_grid3 %>%
group_by(theta2, omega) %>%
summarize(p_theta2_omega = sum(prior)) %>%
ggplot(aes(x = theta2, y = omega, z = p_theta2_omega)) +
geom_contour(color = 'royalblue') +
labs(x = expression(theta[2]), y = expression(omega))
# Marginals
# marginal of omega p(omega)
m_omega <- df_grid3 %>%
group_by(omega) %>%
summarize(m_p_omega = sum(prior)) %>%
mutate(m_p_omega = m_p_omega / sum(grid_gap * m_p_omega)) %>%
ggplot(aes(omega, m_p_omega)) +
geom_line(color = 'royalblue') +
labs(x = expression(omega),
y = expression(paste('margnial ', p(omega)))) +
coord_flip()
# marginal of theta1 p(theta1)
m_theta1 <- df_grid3 %>%
group_by(theta1) %>%
summarize(m_p_theta1 = sum(prior)) %>%
mutate(m_p_theta1 = m_p_theta1 / sum(grid_gap * m_p_theta1)) %>%
ggplot(aes(theta1, m_p_theta1)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta[1]),
y = expression(paste('marginal ', p(theta[1]))))
# marginal of theta2 p(theta2)
m_theta2 <- df_grid3 %>%
group_by(theta2) %>%
summarize(m_p_theta2 = sum(prior)) %>%
mutate(m_p_theta2 = m_p_theta2 / sum(grid_gap * m_p_theta2)) %>%
ggplot(aes(theta2, m_p_theta2)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta[2]),
y = expression(paste('marginal ', p(theta[2]))))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(2, 3)))
print(cont_prior1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(cont_prior2, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
print(m_omega, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))
print(m_theta1, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
print(m_theta2, vp = viewport(layout.pos.row = 2, layout.pos.col = 2))データは \(D_1\) = {3 heads, 12 tails}, \(D_2\) = {4 heads, 1 tail}。このとき尤度は、 \[L(\theta_1|D_1, \omega) = p(D_1 | \theta_1, \omega) = p(D_1|\theta_1) = \prod_{i=1}^{15} \theta^{y_i} (1-\theta)^{1-y_i} =\theta^3 (1 - \theta)^{12},\] \[L(\theta_2|D_2, \omega) = p(D_2 | \theta_2, \omega) = p(D_2|\theta_2) = \prod_{i=1}^{5} \theta^{y_i} (1-\theta)^{1-y_i} =\theta^4 (1 - \theta)^1.\] この尤度関数を図示する。
df_grid3 <- df_grid3 %>%
mutate(lik1 = theta1^3 * (1 - theta1)^12,
lik2 = theta2^4 * (1 - theta2))
# 3D plot: the result doesn't appear on HTML
plot3d(df_grid3[c('theta1', 'omega', 'lik1')], col = 'red')
plot3d(df_grid3[c('theta2', 'omega', 'lik2')], col = 'red')
# Contour of the likelihood
cont_lik1 <- df_grid3 %>%
group_by(theta1, omega) %>%
summarize(lik1 = mean(lik1)) %>%
ggplot(aes(x = theta1, y = omega, z = lik1)) +
geom_contour(color = 'red') +
labs(x = expression(theta[1]), y = expression(omega))
cont_lik2 <- df_grid3 %>%
group_by(theta2, omega) %>%
summarize(lik2 = mean(lik2)) %>%
ggplot(aes(x = theta2, y = omega, z = lik2)) +
geom_contour(color = 'red') +
labs(x = expression(theta[2]), y = expression(omega))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(1, 3)))
print(cont_lik1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(cont_lik2, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))事前確率と尤度から事後確率を求め、事後分布を図示する。 求める事後確率は、 \[p(\theta_1, \theta_2, \omega | D) \propto p(D | \theta_1, \theta_2, \omega) p(\theta_1, \theta_2, \omega) = p(D_1 | \theta_1) p(D_2 | \theta_2) p(\theta_1|\omega) p(\theta_2 | \omega) p(\omega)\] である。最後に、確率を正規化(確率の和が1になるように調整)する。
df_grid3 <- df_grid3 %>%
mutate(post = lik1 * lik2 * prior,
post = post / sum(post)) # normalize
# Contour of joint posterior p(theta1, omega | D)
cont_post1 <- df_grid3 %>%
group_by(theta1, omega) %>%
summarize(post = sum(post)) %>%
ggplot(aes(x = theta1, y = omega, z = post)) +
geom_contour(color = 'darkgreen') +
xlim(0, 1) + ylim(0, 1) +
labs(x = expression(theta[1]), y = expression(omega))
# Contour of joint posterior p(theta2, omega | D)
cont_post2 <- df_grid3 %>%
group_by(theta2, omega) %>%
summarize(post = sum(post)) %>%
ggplot(aes(x = theta2, y = omega, z = post)) +
geom_contour(color = 'darkgreen') +
xlim(0, 1) + ylim(0, 1) +
labs(x = expression(theta[2]), y = expression(omega))
# Marginal posterior omega p(omega | D)
post_omega <- df_grid3 %>%
group_by(omega) %>%
summarize(post = sum(post)) %>%
mutate(post = post / sum(grid_gap * post)) %>%
ggplot(aes(omega, post)) +
geom_line(color = 'darkgreen') +
labs(x = expression(omega),
y = expression(paste('margnial ', 'p(', omega, '|D)'))) +
coord_flip()
# marginal of posterior theta1 p(theta_1 | D)
post_theta1 <- df_grid3 %>%
group_by(theta1) %>%
summarize(post = sum(post)) %>%
mutate(post = post / sum(grid_gap * post)) %>%
ggplot(aes(theta1, post)) +
geom_line(color = 'darkgreen') +
ylim(0, 4) +
labs(x = expression(theta[1]),
y = expression(paste('marginal ', 'p(', theta[1], '|D)')))
# marginal of posterior theta2 p(theta_2 | D)
post_theta2 <- df_grid3 %>%
group_by(theta2) %>%
summarize(post = sum(post)) %>%
mutate(post = post / sum(grid_gap * post)) %>%
ggplot(aes(theta2, post)) +
geom_line(color = 'darkgreen') +
ylim(0, 4) +
labs(x = expression(theta[2]),
y = expression(paste('marginal ', 'p(', theta[2], '|D)')))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(2, 3)))
print(cont_post1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(cont_post2, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
print(post_omega, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))
print(post_theta1, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
print(post_theta2, vp = viewport(layout.pos.row = 2, layout.pos.col = 2))この事後分布を使って、コインのバイアス\(\theta_1\), \(\theta_2\) と鋳造所のバイアス \(\omega\) について考察する。 そのために、\(\theta_s\)の周辺事後分布\(p(\theta_s|D)\)から10,000個の値を無作為に抽出し、そのサンプルを利用する。
# theta1
post_theta1 <- post_theta1$data
post_theta1 <- sample(post_theta1$theta1, size = 10000,
prob = post_theta1$post, replace = TRUE)
hist(post_theta1, col = 'gray', freq = FALSE,
xlab = expression(paste('p(', theta[1], '|D)')), main = '')# theta2
post_theta2 <- post_theta2$data
post_theta2 <- sample(post_theta2$theta2, size = 10000,
prob = post_theta2$post, replace = TRUE)
hist(post_theta2, col = 'gray', freq = FALSE,
xlab = expression(paste('p(', theta[2], '|D)')), main = '')# omega
post_omega <- post_omega$data
post_omega <- sample(post_omega$omega, size = 10000,
prob = post_omega$post, replace = TRUE)
hist(post_omega, col = 'firebrick', freq = FALSE,
xlab = expression(paste('p(', omega, '|D)')), main = '')推定値を求める。
# posterior mean
mean(post_theta1); mean(post_theta2); mean(post_omega)## [1] 0.268416
## [1] 0.637675
## [1] 0.465228
# MAP
mlv(post_theta1, method = 'naive', bw = 1/5)## Mode (most likely value): 0.29
## Bickel's modal skewness: -0.2245
## Call: mlv.default(x = post_theta1, bw = 1/5, method = "naive")
mlv(post_theta2, method = 'naive', bw = 1/5)## Mode (most likely value): 0.65
## Bickel's modal skewness: -0.0127
## Call: mlv.default(x = post_theta2, bw = 1/5, method = "naive")
mlv(post_omega, method = 'naive', bw = 1/5)## Mode (most likely value): 0.43
## Bickel's modal skewness: 0.1198
## Call: mlv.default(x = post_omega, bw = 1/5, method = "naive")
# 90% posterior interval
quantile(post_theta1, c(.05, .95))## 5% 95%
## 0.12 0.45
quantile(post_theta2, c(.05, .95))## 5% 95%
## 0.37 0.88
quantile(post_omega, c(.05, .95))## 5% 95%
## 0.16 0.79
# 90% HPDI
HPDinterval(as.mcmc(post_theta1), prob = .9)## lower upper
## var1 0.1 0.42
## attr(,"Probability")
## [1] 0.9
HPDinterval(as.mcmc(post_theta2), prob = .9)## lower upper
## var1 0.37 0.87
## attr(,"Probability")
## [1] 0.9
HPDinterval(as.mcmc(post_omega), prob = .9)## lower upper
## var1 0.14 0.76
## attr(,"Probability")
## [1] 0.9
まず、グリッドを作り、グリッド上の事前確率を求める。
K <- 75 # concentration of Beta prior
A_omega <- B_omega <- 2 # shape parameters of hyperprior
grid_gap <- .01
df_grid4 <- expand.grid(theta1 = seq(0, 1, by = grid_gap),
theta2 = seq(0, 1, by = grid_gap),
omega = seq(0, 1, by = grid_gap)) %>%
mutate(p_theta1 = dbeta(theta1, # p(theta1 | omega)
shape1 = beta_shapes(omega, K)$shape1,
shape2 = beta_shapes(omega, K)$shape2),
p_theta2 = dbeta(theta2, # p(theta2 | omega)
shape1 = beta_shapes(omega, K)$shape1,
shape2 = beta_shapes(omega, K)$shape2),
p_omega = dbeta(omega, shape1 = A_omega, shape2 = B_omega), # p(omega)
prior = p_theta1 * p_theta2 * p_omega)
glimpse(df_grid4)## Observations: 1,030,301
## Variables: 7
## $ theta1 (dbl) 0.00, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08,...
## $ theta2 (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ omega (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ p_theta1 (dbl) 7.400000e+01, 3.553047e+01, 1.693305e+01, 8.008816e+0...
## $ p_theta2 (dbl) 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 7...
## $ p_omega (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ prior (dbl) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
いくつかの周辺事前確率を図示する。
# Contour of the joint prior p(theta1, omega)
cont_prior1 <- df_grid4 %>%
group_by(theta1, omega) %>%
summarize(p_theta1_omega = sum(prior)) %>%
ggplot(aes(x = theta1, y = omega, z = p_theta1_omega)) +
geom_contour(color = 'royalblue') +
labs(x = expression(theta[1]), y = expression(omega))
# Contour of the joint prior p(theta2, omega)
cont_prior2 <- df_grid4 %>%
group_by(theta2, omega) %>%
summarize(p_theta2_omega = sum(prior)) %>%
ggplot(aes(x = theta2, y = omega, z = p_theta2_omega)) +
geom_contour(color = 'royalblue') +
labs(x = expression(theta[2]), y = expression(omega))
# Marginals
# marginal of omega p(omega)
m_omega <- df_grid4 %>%
group_by(omega) %>%
summarize(m_p_omega = sum(prior)) %>%
mutate(m_p_omega = m_p_omega / sum(grid_gap * m_p_omega)) %>%
ggplot(aes(omega, m_p_omega)) +
geom_line(color = 'royalblue') +
labs(x = expression(omega), y = expression(paste('margnial ', p(omega)))) +
coord_flip()
# marginal of theta1 p(theta1)
m_theta1 <- df_grid4 %>%
group_by(theta1) %>%
summarize(m_p_theta1 = sum(prior)) %>%
mutate(m_p_theta1 = m_p_theta1 / sum(grid_gap * m_p_theta1)) %>%
ggplot(aes(theta1, m_p_theta1)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta[1]), y = expression(paste('marginal ', p(theta[1]))))
# marginal of theta2 p(theta2)
m_theta2 <- df_grid4 %>%
group_by(theta2) %>%
summarize(m_p_theta2 = sum(prior)) %>%
mutate(m_p_theta2 = m_p_theta2 / sum(grid_gap * m_p_theta2)) %>%
ggplot(aes(theta2, m_p_theta2)) +
geom_line(color = 'royalblue') +
labs(x = expression(theta[2]), y = expression(paste('marginal ', p(theta[2]))))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(2, 3)))
print(cont_prior1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(cont_prior2, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
print(m_omega, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))
print(m_theta1, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
print(m_theta2, vp = viewport(layout.pos.row = 2, layout.pos.col = 2))尤度は先ほどと同じ。
df_grid4 <- df_grid4 %>%
mutate(lik1 = theta1^3 * (1 - theta1)^12,
lik2 = theta2^4 * (1 - theta2))
# 3D plot: the result doesn't appear on HTML
plot3d(df_grid4[c('theta1', 'omega', 'lik1')], col = 'red')
plot3d(df_grid4[c('theta2', 'omega', 'lik2')], col = 'red')
# Contour of the likelihood
cont_lik1 <- df_grid4 %>%
group_by(theta1, omega) %>%
summarize(lik1 = mean(lik1)) %>%
ggplot(aes(x = theta1, y = omega, z = lik1)) +
geom_contour(color = 'red') +
labs(x = expression(theta[1]), y = expression(omega))
cont_lik2 <- df_grid4 %>%
group_by(theta2, omega) %>%
summarize(lik2 = mean(lik2)) %>%
ggplot(aes(x = theta2, y = omega, z = lik2)) +
geom_contour(color = 'red') +
labs(x = expression(theta[2]), y = expression(omega))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(1, 3)))
print(cont_lik1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(cont_lik2, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))最後に、事後確率を求めて事後分布を図示する。
df_grid4 <- df_grid4 %>%
mutate(post = lik1 * lik2 * prior,
post = post / sum(post)) # normalize
# Contour of joint posterior p(theta1, omega | D)
cont_post1 <- df_grid4 %>%
group_by(theta1, omega) %>%
summarize(post = sum(post)) %>%
ggplot(aes(x = theta1, y = omega, z = post)) +
geom_contour(color = 'darkgreen') +
xlim(0, 1) + ylim(0, 1) +
labs(x = expression(theta[1]), y = expression(omega))
# Contour of joint posterior p(theta2, omega | D)
cont_post2 <- df_grid4 %>%
group_by(theta2, omega) %>%
summarize(post = sum(post)) %>%
ggplot(aes(x = theta2, y = omega, z = post)) +
geom_contour(color = 'darkgreen') +
xlim(0, 1) + ylim(0, 1) +
labs(x = expression(theta[2]), y = expression(omega))
# Marginal posterior omega p(omega | D)
post_omega <- df_grid4 %>%
group_by(omega) %>%
summarize(post = sum(post)) %>%
mutate(post = post / sum(grid_gap * post)) %>%
ggplot(aes(omega, post)) +
geom_line(color = 'darkgreen') +
labs(x = expression(omega),
y = expression(paste('margnial ', 'p(', omega, '|D)'))) +
coord_flip()
# marginal of posterior theta1 p(theta_1 | D)
post_theta1 <- df_grid4 %>%
group_by(theta1) %>%
summarize(post = sum(post)) %>%
mutate(post = post / sum(grid_gap * post)) %>%
ggplot(aes(theta1, post)) +
geom_line(color = 'darkgreen') +
ylim(0, 4) +
labs(x = expression(theta[1]),
y = expression(paste('marginal ', 'p(', theta[1], '|D)')))
# marginal of posterior theta2 p(theta_2 | D)
post_theta2 <- df_grid4 %>%
group_by(theta2) %>%
summarize(post = sum(post)) %>%
mutate(post = post / sum(grid_gap * post)) %>%
ggplot(aes(theta2, post)) +
geom_line(color = 'darkgreen') +
ylim(0, 4) +
labs(x = expression(theta[2]),
y = expression(paste('marginal ', 'p(', theta[2], '|D)')))
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(2, 3)))
print(cont_post1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(cont_post2, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
print(post_omega, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))
print(post_theta1, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
print(post_theta2, vp = viewport(layout.pos.row = 2, layout.pos.col = 2))この事後分布を使って、コインのバイアス\(\theta_1\), \(\theta_2\) と鋳造所のバイアス \(\omega\) について考察する。 そのために、\(\theta_s\)の周辺事後分布\(p(\theta_s|D)\)と\(\omega\)の周辺事後分布\(p(\omega|D)\)からそれぞれ10,000個の値を無作為に抽出し、そのサンプルを利用する。
# theta1
post_theta1 <- post_theta1$data
post_theta1 <- sample(post_theta1$theta1, size = 10000,
prob = post_theta1$post, replace = TRUE)
hist(post_theta1, col = 'gray', freq = FALSE,
xlab = expression(paste('p(', theta[1], '|D)')), main = '')# theta2
post_theta2 <- post_theta2$data
post_theta2 <- sample(post_theta2$theta2, size = 10000,
prob = post_theta2$post, replace = TRUE)
hist(post_theta2, col = 'gray', freq = FALSE,
xlab = expression(paste('p(', theta[2], '|D)')), main = '')# omega
post_omega <- post_omega$data
post_omega <- sample(post_omega$omega, size = 10000,
prob = post_omega$post, replace = TRUE)
hist(post_omega, col = 'firebrick', freq = FALSE,
xlab = expression(paste('p(', omega, '|D)')), main = '')推定値を求める。
# posterior mean
mean(post_theta1); mean(post_theta2); mean(post_omega)## [1] 0.35719
## [1] 0.413584
## [1] 0.386632
# MAP
mlv(post_theta1, method = 'naive', bw = 1/5)## Mode (most likely value): 0.36
## Bickel's modal skewness: -0.047
## Call: mlv.default(x = post_theta1, bw = 1/5, method = "naive")
mlv(post_theta2, method = 'naive', bw = 1/5)## Mode (most likely value): 0.42
## Bickel's modal skewness: -0.0645
## Call: mlv.default(x = post_theta2, bw = 1/5, method = "naive")
mlv(post_omega, method = 'naive', bw = 1/5)## Mode (most likely value): 0.37
## Bickel's modal skewness: 0.0988
## Call: mlv.default(x = post_omega, bw = 1/5, method = "naive")
# 90% posterior interval
quantile(post_theta1, c(.05, .95))## 5% 95%
## 0.20 0.53
quantile(post_theta2, c(.05, .95))## 5% 95%
## 0.24 0.60
quantile(post_omega, c(.05, .95))## 5% 95%
## 0.21 0.57
# 90% HPDI
HPDinterval(as.mcmc(post_theta1), prob = .9)## lower upper
## var1 0.18 0.5
## attr(,"Probability")
## [1] 0.9
HPDinterval(as.mcmc(post_theta2), prob = .9)## lower upper
## var1 0.22 0.58
## attr(,"Probability")
## [1] 0.9
HPDinterval(as.mcmc(post_omega), prob = .9)## lower upper
## var1 0.2 0.55
## attr(,"Probability")
## [1] 0.9
\(\theta_s\)の\(\omega\)への依存度が低い場合に比べ、依存度が高い場合には、\(\theta_s\)の値のばらつきが小さくなる。
この場合の統計モデルは、次のように表現できる。 \[y_{i|s} \sim \mbox{Bernoulli}(\theta_s),\] \[\theta_s \sim \mbox{Beta}(a, b),\] \[a = \omega (\kappa - 2) + 1,\] \[b = (1 - \omega) (\kappa - 2) + 1,\] \[ \omega \sim \mbox{Beta}(A_\omega, B_\omega),\] \[\kappa \sim \mbox{Gamma}(S_\kappa, R_\kappa) + 2,\] \[S_\kappa = \mu_g^2 / \sigma_g^2, R_\kappa = \mu_g / \sigma_g^2.\] ただし、\(\mu_g\) と\(\sigma_g\) はそれぞれガンマ分布の期待値と標準偏差である。
この統計モデルをStan で推定してみよう。 Stanのモデルは次のように書ける。
model_2 <- "
data {
int<lower=1> N; // total number or observations
int<lower=1> S; // number of subjects
int<lower=1,upper=S> s[N]; // subject ID
int<lower=0, upper=1> y[N]; // binary outcome
}
parameters {
real<lower=0, upper=1> theta[S];
real<lower=0, upper=1> omega;
real<lower=0> kappa_minus_two;
}
transformed parameters {
real<lower=0> a;
real<lower=0> b;
a <- omega * kappa_minus_two + 1;
b <- (1 - omega) * kappa_minus_two + 1;
}
model {
for (i in 1:N)
y[i] ~ bernoulli(theta[s[i]]);
for (j in 1:S)
theta[j] ~ beta(a, b);
omega ~ beta(1, 1);
kappa_minus_two ~ gamma(0.01, 0.01); // mean=1, sd=10
}
generated quantities {
real<lower=2> kappa;
kappa <- kappa_minus_two + 2;
}
"
write(model_2, file = 'kruschke-ch09/model-2.stan')Kruschke (2014) の Therapeutic Touch (タッチ療法) Data (available here) を使い、28人の被験者(実際には21人で、そのうち7人は2回参加)が「エナジーフィールド (?)」を知覚できる確率を推定する。 各被験者は10回テストを受ける。
まず、各被験者の正解率を図にしてみよう。
TTD <- read.csv('DBDA2Eprograms/TherapeuticTouchData.csv')
glimpse(TTD)## Observations: 280
## Variables: 2
## $ y (int) 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, ...
## $ s (fctr) S01, S01, S01, S01, S01, S01, S01, S01, S01, S01, S02, S02,...
correct_rate <- TTD %>%
group_by(s) %>%
summarize(correct = sum(y) / n()) %>%
ggplot(aes(x = correct)) + geom_bar() + xlim(0, 1) +
labs(x = 'Proportion Correct', y = '# Practitioners')
print(correct_rate)また、被験者の区別をしないとき、全体の正答率は、
(pooling <- with(TTD, sum(y)/nrow(TTD)))## [1] 0.4392857
ガンマ分布の期待値と標準偏差を1と10に設定し、Stanで推定を行う。
myd <- list(N = nrow(TTD), S = length(unique(TTD$s)),
s = as.numeric(TTD$s), y = TTD$y)
stanfit_m2 <- stan(file = 'kruschke-ch09/model-2.stan', data = myd,
chains = 4, warmup = 1000, iter = 4000)##
## SAMPLING FOR MODEL 'model-2' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 1, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 1, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 1, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 1, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 1, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 1, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 1, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 1, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 1, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 1, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 1, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.629126 seconds (Warm-up)
## # 2.13781 seconds (Sampling)
## # 2.76693 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-2' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 2, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 2, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 2, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 2, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 2, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 2, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 2, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 2, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 2, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 2, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 2, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.704076 seconds (Warm-up)
## # 1.578 seconds (Sampling)
## # 2.28208 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-2' NOW (CHAIN 3).
##
## Chain 3, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 3, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 3, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 3, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 3, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 3, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 3, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 3, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 3, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 3, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 3, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 3, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.750791 seconds (Warm-up)
## # 1.94057 seconds (Sampling)
## # 2.69136 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-2' NOW (CHAIN 4).
##
## Chain 4, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 4, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 4, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 4, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 4, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 4, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 4, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 4, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 4, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 4, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 4, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 4, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 0.755667 seconds (Warm-up)
## # 1.53749 seconds (Sampling)
## # 2.29315 seconds (Total)
## #
print(stanfit_m2, pars = c('theta', 'omega'), probs = c(.05, .5, .95))## Inference for Stan model: model-2.
## 4 chains, each with iter=4000; warmup=1000; thin=1;
## post-warmup draws per chain=3000, total post-warmup draws=12000.
##
## mean se_mean sd 5% 50% 95% n_eff Rhat
## theta[1] 0.36 0 0.09 0.20 0.37 0.49 1971 1
## theta[2] 0.38 0 0.08 0.24 0.39 0.51 2893 1
## theta[3] 0.41 0 0.08 0.27 0.41 0.54 12000 1
## theta[4] 0.41 0 0.08 0.27 0.41 0.54 4377 1
## theta[5] 0.41 0 0.08 0.27 0.41 0.53 4716 1
## theta[6] 0.41 0 0.08 0.27 0.41 0.54 12000 1
## theta[7] 0.41 0 0.08 0.27 0.41 0.54 4885 1
## theta[8] 0.41 0 0.08 0.27 0.41 0.53 4728 1
## theta[9] 0.41 0 0.08 0.27 0.41 0.54 4560 1
## theta[10] 0.41 0 0.08 0.27 0.41 0.54 4957 1
## theta[11] 0.43 0 0.08 0.30 0.43 0.56 12000 1
## theta[12] 0.43 0 0.08 0.30 0.43 0.56 12000 1
## theta[13] 0.43 0 0.08 0.30 0.43 0.57 12000 1
## theta[14] 0.43 0 0.08 0.30 0.43 0.56 6541 1
## theta[15] 0.43 0 0.08 0.30 0.43 0.56 12000 1
## theta[16] 0.45 0 0.08 0.33 0.45 0.59 12000 1
## theta[17] 0.45 0 0.08 0.33 0.45 0.59 12000 1
## theta[18] 0.45 0 0.08 0.33 0.45 0.59 12000 1
## theta[19] 0.45 0 0.08 0.33 0.45 0.59 6768 1
## theta[20] 0.45 0 0.08 0.33 0.45 0.59 12000 1
## theta[21] 0.45 0 0.08 0.33 0.45 0.59 12000 1
## theta[22] 0.45 0 0.08 0.33 0.45 0.59 6993 1
## theta[23] 0.48 0 0.08 0.35 0.47 0.62 4881 1
## theta[24] 0.48 0 0.08 0.35 0.47 0.62 5215 1
## theta[25] 0.50 0 0.09 0.37 0.49 0.66 3679 1
## theta[26] 0.50 0 0.09 0.37 0.49 0.66 3343 1
## theta[27] 0.50 0 0.09 0.37 0.49 0.66 3567 1
## theta[28] 0.52 0 0.09 0.39 0.51 0.70 2355 1
## omega 0.44 0 0.04 0.37 0.44 0.50 2285 1
##
## Samples were drawn using NUTS(diag_e) at Thu Jun 23 22:28:06 2016.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(stanfit_m2, pars = c('theta', 'omega'))traceplot(stanfit_m2, pars = c('theta', 'omega'))Kruschke (2014) p.243 と同じ図を(1つずつ)表示する。 まず、サンプルを取り出す。
post_stan2 <- as.data.frame(extract(stanfit_m2, pars = c('theta', 'omega', 'kappa')))
plot_stan2 <- ggplot(post_stan2)\(\kappa\) の事後分布。
plot_stan2 + geom_histogram(aes(kappa), color = 'black')# MAP
mlv(post_stan2$kappa, method = 'naive', bw = 1/5)## Mode (most likely value): 15.57604
## Bickel's modal skewness: 0.7085833
## Call: mlv.default(x = post_stan2$kappa, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$kappa)), prob = .95)## lower upper
## var1 3.754442 161.0809
## attr(,"Probability")
## [1] 0.95
\(\theta_1\) の事後分布。
(theta_1 <- plot_stan2 + geom_histogram(aes(theta.1), color = 'black'))# MAP
mlv(post_stan2$theta.1, method = 'naive', bw = 1/5)## Mode (most likely value): 0.348311
## Bickel's modal skewness: 0.1896667
## Call: mlv.default(x = post_stan2$theta.1, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$theta.1)), prob = .95)## lower upper
## var1 0.1775063 0.5314088
## attr(,"Probability")
## [1] 0.95
\(\theta_{14}\) の事後分布。
(theta_14 <- plot_stan2 + geom_histogram(aes(theta.14), color = 'black'))# MAP
mlv(post_stan2$theta.14, method = 'naive', bw = 1/5)## Mode (most likely value): 0.4312726
## Bickel's modal skewness: -0.01883333
## Call: mlv.default(x = post_stan2$theta.14, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$theta.14)), prob = .95)## lower upper
## var1 0.2751831 0.5892203
## attr(,"Probability")
## [1] 0.95
\(\theta_{28}\) の事後分布。
(theta_28 <- plot_stan2 + geom_histogram(aes(theta.28), color = 'black'))# MAP
mlv(post_stan2$theta.28, method = 'naive', bw = 1/5)## Mode (most likely value): 0.5503598
## Bickel's modal skewness: -0.3043333
## Call: mlv.default(x = post_stan2$theta.28, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$theta.28)), prob = .95)## lower upper
## var1 0.3602558 0.7255168
## attr(,"Probability")
## [1] 0.95
\(\theta_1 - \theta_{14}\) の事後分布。
(theta_1_14 <- plot_stan2 + geom_histogram(aes(theta.1 - theta.14), color = 'black'))scat_1_14 <- plot_stan2 +
geom_point(aes(x = theta.1, y = theta.14), alpha = 1/3) +
geom_abline(slope = 1, linetype = 'dashed', color = 'firebrick')
# MAP
mlv(post_stan2$theta.1 - post_stan2$theta.14, method = 'naive', bw = 1/5)## Mode (most likely value): -0.07694172
## Bickel's modal skewness: 0.1368333
## Call: mlv.default(x = post_stan2$theta.1 - post_stan2$theta.14, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$theta.1) - as.vector(post_stan2$theta.14)), prob = .95)## lower upper
## var1 -0.3026925 0.1454397
## attr(,"Probability")
## [1] 0.95
\(\theta_1 - \theta_{28}\) の事後分布。
(theta_1_28 <- plot_stan2 + geom_histogram(aes(theta.1 - theta.28), color = 'black'))scat_1_28 <- plot_stan2 +
geom_point(aes(x = theta.1, y = theta.28), alpha = 1/3) +
geom_abline(slope = 1, linetype = 'dashed', color = 'firebrick')
# MAP
mlv(post_stan2$theta.1 - post_stan2$theta.28, method = 'naive', bw = 1/5)## Mode (most likely value): -0.1572661
## Bickel's modal skewness: 0.0935
## Call: mlv.default(x = post_stan2$theta.1 - post_stan2$theta.28, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$theta.1) - as.vector(post_stan2$theta.28)), prob = .95)## lower upper
## var1 -0.4526814 0.08427892
## attr(,"Probability")
## [1] 0.95
\(\theta_{14} - \theta_{28}\) の事後分布。
(theta_14_28 <- plot_stan2 + geom_histogram(aes(theta.14 - theta.28), color = 'black'))scat_14_28 <- plot_stan2 +
geom_point(aes(x = theta.14, y = theta.28), alpha = 1/3) +
geom_abline(slope = 1, linetype = 'dashed', color = 'firebrick')
# MAP
mlv(post_stan2$theta.14 - post_stan2$theta.28, method = 'naive', bw = 1/5)## Mode (most likely value): -0.1040533
## Bickel's modal skewness: 0.1653333
## Call: mlv.default(x = post_stan2$theta.14 - post_stan2$theta.28, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$theta.14) - as.vector(post_stan2$theta.28)), prob = .95)## lower upper
## var1 -0.3655969 0.1097232
## attr(,"Probability")
## [1] 0.95
Figure 9.10 のように並べる(\(\theta_s\) のみ)。
# Display in a single view
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(3, 3)))
print(theta_1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(theta_1_14, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
print(theta_1_28, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))
print(scat_1_14, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
print(theta_14, vp = viewport(layout.pos.row = 2, layout.pos.col = 2))
print(theta_14_28, vp = viewport(layout.pos.row = 2, layout.pos.col = 3))
print(scat_1_28, vp = viewport(layout.pos.row = 3, layout.pos.col = 1))
print(scat_14_28, vp = viewport(layout.pos.row = 3, layout.pos.col = 2))
print(theta_28, vp = viewport(layout.pos.row = 3, layout.pos.col = 3))\(\omega\) の事後分布。
plot_stan2 + geom_histogram(aes(omega), color = 'black') +
geom_vline(xintercept = .5, color = 'red')# MAP
mlv(post_stan2$omega, method = 'naive', bw = 1/5)## Mode (most likely value): 0.4281444
## Bickel's modal skewness: 0.1626667
## Call: mlv.default(x = post_stan2$omega, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_stan2$omega)), prob = .95)## lower upper
## var1 0.3636348 0.5133294
## attr(,"Probability")
## [1] 0.95
この結果を見る限り、「エナジーフィールド」は判別できないようである。
階層モデル (partial pooling) で推定された正答率\(\theta_s\)は、各被験者ごとに別々に推定されたもの (no pooling) の値よりも、被験者の違いを無視してデータをプールして得られた推定値 (pooling) に近づく。この性質をshrinkageと呼ぶ。
Shrinkage を図示してみる。
shrinkage <- correct_rate$data
shrinkage$post_means <- sapply(post_stan2[, 1:28], mean)
names(shrinkage) <- c('subject', 'separate', 'hierarchical')
shrinkage_long <- shrinkage %>%
gather(key = method, value = estimate, separate:hierarchical)
plt_shrinkage <- ggplot(shrinkage_long,
aes(x = subject, y = estimate, color = method)) +
geom_point() +
geom_hline(yintercept = pooling, linetype = 'dashed') +
coord_flip()
print(plt_shrinkage)この図の点線は、pooling で得られた推定ちである。階層モデルの推定値は、separate (no poooling) の推定値よりもpooling 寄りになっていることがわかる。
いまのところこの勉強会でJAGSを使う予定はないのでスキップする。
この問題の統計モデルは、次のように書ける。 \[z_{s|c} = \sum_{i=1|s}^{n_s} y_i \sim \mbox{Binomial}(n_s, \theta_{s|c}),\] \[\theta_{s|c} \sim \mbox{Beta}(a_c, b_c),\] \[a_c = \omega_c (\kappa_c - 2) + 1,\] \[b_c = (1 - \omega_c) (\kappa_c - 2) + 1,\] \[\omega_c \sim \mbox{Beta}(a_0, b_0),\] \[a_0 = \omega_0 (\kappa_0 - 2) + 1,\] \[b_0 = (1 - \omega_0) (\kappa_0 - 2) + 1,\] \[\kappa_c - 2 \sim \mbox{Gamma}(S, R),\] \[\omega_0 \sim \mbox{Beta}(A, B),\] \[\kappa_0 -2 \sim \mbox{Gamma}(S_0, R_0).\] ただし、下付きのs はsubject(各選手)、cはcluster(各ポジション)を表す。
\(S=S_0=R=R_0=0.01\)(つまり、ガンマ分布の平均と標準偏差をそれぞれ1と10にする)、\(A=B=1\)(すなわち、\(\omega_0\)の事前分布を一様分布)にすると、このモデルは次のStanモデルで推定できる。
model_3 <- "
data {
int<lower=1> S; // n. of subjects
int<lower=1> n[S]; // n. of trials for s
int<lower=0> z[S]; // n. of successes for s
int<lower=1> C; // n. of categories
int<lower=1, upper=C> c[S]; // category of s
}
parameters {
real<lower=0, upper=1> theta[S];
real<lower=0, upper=1> omega[C];
real<lower=0> kappa_minus_two[C];
real<lower=0, upper=1> omega_0;
real<lower=0> kappa_0_minus_two;
}
transformed parameters {
real<lower=0> a[C];
real<lower=0> b[C];
real<lower=0> a_0;
real<lower=0> b_0;
for (j in 1:C) {
a[j] <- omega[j] * kappa_minus_two[j] + 1;
b[j] <- (1 - omega[j]) * kappa_minus_two[j] + 1;
}
a_0 <- omega_0 * kappa_0_minus_two + 1;
b_0 <- (1 - omega_0) * kappa_0_minus_two + 1;
}
model {
for (s in 1:S){
z[s] ~ binomial(n[s], theta[s]);
theta[s] ~ beta(a[c[s]], b[c[s]]);
}
for (j in 1:C) {
omega[j] ~ beta(a_0, b_0);
kappa_minus_two[j] ~ gamma(0.01, 0.01); // mean=1, sd=10
}
omega_0 ~ beta(1, 1); // non-informative
kappa_0_minus_two ~ gamma(0.01, 0.01); // mean=1, sd=10
}
generated quantities {
real<lower=2> kappa[C];
real<lower=2> kappa_0;
for (j in 1:C)
kappa[j] <- kappa_minus_two[j] + 2;
kappa_0 <- kappa_0_minus_two + 2;
}
"
write(model_3, file = 'kruschke-ch09/model-3.stan')Kruschke (2014) が提供する BattingAverage.csv のデータを使って推定を行ってみよう。
BA <- read.csv('DBDA2Eprograms/BattingAverage.csv')
glimpse(BA)## Observations: 948
## Variables: 6
## $ Player (fctr) Fernando Abad, Bobby Abreu, Tony Abreu, Dustin A...
## $ PriPos (fctr) Pitcher, Left Field, 2nd Base, 2nd Base, 1st Bas...
## $ Hits (int) 1, 53, 18, 137, 21, 0, 0, 2, 150, 167, 0, 128, 66...
## $ AtBats (int) 7, 219, 70, 607, 86, 1, 1, 20, 549, 576, 1, 525, ...
## $ PlayerNumber (int) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15...
## $ PriPosNumber (int) 1, 7, 4, 4, 3, 1, 1, 3, 3, 4, 1, 5, 4, 2, 7, 4, 6...
# extract necessary data
myd <- list(S = nrow(BA), n = BA$AtBats, z = BA$Hits,
C = length(unique(BA$PriPos)), c = BA$PriPosNumber)
# Run Stan
stanfit_m3 <- stan(file = 'kruschke-ch09/model-3.stan', data = myd,
chains = 4, warmup = 1000, iter = 4000)##
## SAMPLING FOR MODEL 'model-3' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 1, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 1, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 1, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 1, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 1, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 1, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 1, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 1, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 1, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 1, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 1, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 56.3271 seconds (Warm-up)
## # 93.3811 seconds (Sampling)
## # 149.708 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-3' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 2, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 2, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 2, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 2, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 2, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 2, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 2, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 2, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 2, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 2, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 2, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 33.8466 seconds (Warm-up)
## # 92.2963 seconds (Sampling)
## # 126.143 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-3' NOW (CHAIN 3).
##
## Chain 3, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 3, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 3, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 3, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 3, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 3, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 3, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 3, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 3, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 3, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 3, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 3, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 34.2563 seconds (Warm-up)
## # 72.6535 seconds (Sampling)
## # 106.91 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model-3' NOW (CHAIN 4).
##
## Chain 4, Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 4, Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 4, Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 4, Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 4, Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 4, Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 4, Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 4, Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 4, Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 4, Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 4, Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 4, Iteration: 4000 / 4000 [100%] (Sampling)#
## # Elapsed Time: 60.0251 seconds (Warm-up)
## # 48.4987 seconds (Sampling)
## # 108.524 seconds (Total)
## #
\(\hat{R}\)とトレースプロットを確認する。
stan_rhat(stanfit_m3)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
traceplot(stanfit_m3, pars = c('omega'))# theta は数が多いので省略推定値をプロットする。
plot(stanfit_m3, pars = c('omega')) # average for each position## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)
plot(stanfit_m3, pars = c('theta')) # average for each player## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)
ポジションごとの打率の推定値を見てみよう。 ポシジョンとそれに対応する数字は以下の通り。
BA %>% group_by(PriPos) %>%
summarize(PriPosNumber = PriPosNumber[1],
n_players = n(),
average = sum(Hits)/sum(AtBats)) %>%
arrange(average)## Source: local data frame [9 x 4]
##
## PriPos PriPosNumber n_players average
## <fctr> <int> <int> <dbl>
## 1 Pitcher 1 324 0.1291476
## 2 Catcher 2 103 0.2474045
## 3 Shortstop 6 63 0.2551858
## 4 2nd Base 4 72 0.2556755
## 5 1st Base 3 81 0.2588514
## 6 Left Field 7 103 0.2590765
## 7 Center Field 8 67 0.2635128
## 8 Right Field 9 60 0.2635553
## 9 3rd Base 5 75 0.2650357
ピッチャーとキャッチャーの打率を比べる。
post_hit <- as.data.frame(extract(stanfit_m3, pars = c('omega')))
# MAP
mlv(post_hit$omega.1, method = 'naive', bw = 1/5) # pitcher## Mode (most likely value): 0.1219654
## Bickel's modal skewness: 0.001333333
## Call: mlv.default(x = post_hit$omega.1, bw = 1/5, method = "naive")
mlv(post_hit$omega.2, method = 'naive', bw = 1/5) # cather## Mode (most likely value): 0.2370388
## Bickel's modal skewness: 0.0045
## Call: mlv.default(x = post_hit$omega.2, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_hit$omega.1)), prob = .95)## lower upper
## var1 0.1089533 0.1342195
## attr(,"Probability")
## [1] 0.95
HPDinterval(as.mcmc(as.vector(post_hit$omega.2)), prob = .95)## lower upper
## var1 0.2268633 0.2477407
## attr(,"Probability")
## [1] 0.95
# graphs
hist_pitcher <- ggplot(post_hit, aes(x = omega.1)) +
geom_histogram(color = 'black') +
labs(x = "Pitchers' Batting Average")
hist_catcher <- ggplot(post_hit, aes(x = omega.2)) +
geom_histogram(color = 'black') +
labs(x = "Catchers' Batting Average")
hist_pit_cat <- ggplot(post_hit, aes(x = omega.1 - omega.2)) +
geom_histogram(color = 'gray', fill = 'royalblue') +
labs(x = 'Difference in Batting Average: Pitcer - Catcher')
scat_pit_cat <- ggplot(post_hit, aes(omega.1, omega.2)) +
geom_point(color = 'royalblue') +
labs(x = 'Pitcher', y = 'Catcher') +
geom_abline(slope = 1, linetype = 'dashed')
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(2, 2)))
print(hist_pitcher, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(hist_pit_cat, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
print(scat_pit_cat, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
print(hist_catcher, vp = viewport(layout.pos.row = 2, layout.pos.col = 2))キャッチャーと1塁手の打率を比べる。
# MAP
mlv(post_hit$omega.2, method = 'naive', bw = 1/5) # catcher## Mode (most likely value): 0.2370388
## Bickel's modal skewness: 0.0045
## Call: mlv.default(x = post_hit$omega.2, bw = 1/5, method = "naive")
mlv(post_hit$omega.3, method = 'naive', bw = 1/5) # 1st base## Mode (most likely value): 0.2515569
## Bickel's modal skewness: 0.01833333
## Call: mlv.default(x = post_hit$omega.3, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(as.vector(post_hit$omega.2)), prob = .95)## lower upper
## var1 0.2268633 0.2477407
## attr(,"Probability")
## [1] 0.95
HPDinterval(as.mcmc(as.vector(post_hit$omega.3)), prob = .95)## lower upper
## var1 0.2419242 0.2611729
## attr(,"Probability")
## [1] 0.95
# graphs
hist_1st <- ggplot(post_hit, aes(x = omega.3)) +
geom_histogram(color = 'black') +
labs(x = "1st Basemen's Batting Average")
hist_cat_1st <- ggplot(post_hit, aes(x = omega.2 - omega.3)) +
geom_histogram(color = 'gray', fill = 'royalblue') +
labs(x = 'Difference in Batting Average: Catcher - 1st-Base')
scat_cat_1st <- ggplot(post_hit, aes(omega.2, omega.3)) +
geom_point(color = 'royalblue') +
labs(x = 'Catcher', y = '1st Base') +
geom_abline(slope = 1, linetype = 'dashed')
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(2, 2)))
print(hist_catcher, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(hist_cat_1st, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
print(scat_cat_1st, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
print(hist_1st, vp = viewport(layout.pos.row = 2, layout.pos.col = 2))各ポジションの打率の事後分布。
positions <- BA %>% group_by(PriPos) %>%
summarize(pos = mean(PriPosNumber)) %>%
arrange(pos) %>%
select(PriPos)
positions <- as.character(positions$PriPos)
hist_gg <- ggplot(post_hit)
hist_positions <- vector('list', 9)
for (i in 1:9) {
hist_positions[[i]] <- hist_gg +
geom_histogram(aes_string(x = paste0('omega.', i)), binwidth = 0.005) +
xlim(.05, .3) +
xlab(positions[i])
}
grid.newpage() # create a blank display
pushViewport(viewport(layout = grid.layout(3, 3)))
counter <- 1
for (i in 1:3) {
for (j in 1:3) {
print(hist_positions[[counter]], vp = viewport(layout.pos.row = i, layout.pos.col = j))
counter <- counter + 1
}
}イチローの打率の推定結果。
BA %>% filter(Player == 'Ichiro Suzuki')## Player PriPos Hits AtBats PlayerNumber PriPosNumber
## 1 Ichiro Suzuki Right Field 178 629 844 9
ichiro <- rstan::extract(stanfit_m3, par = 'theta')$theta[, 844]
hist(ichiro, freq = FALSE, col = 'skyblue', main = '')# MAP
mlv(ichiro, method = 'naive', bw = 1/5)## Mode (most likely value): 0.2743412
## Bickel's modal skewness: -0.019
## Call: mlv.default(x = ichiro, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(ichiro), prob = .95)## lower upper
## var1 0.2462723 0.3022922
## attr(,"Probability")
## [1] 0.95
ちなみに、イチローのこの年(2012年)の実際の打率は 0.2829889 である。
青木宣親の打率の推定結果。
BA %>% filter(Player == 'Norichika Aoki')## Player PriPos Hits AtBats PlayerNumber PriPosNumber
## 1 Norichika Aoki Right Field 150 520 19 9
aoki <- rstan::extract(stanfit_m3, par = 'theta')$theta[, 19]
hist(aoki, freq = FALSE, col = 'firebrick', main = '')# MAP
mlv(aoki, method = 'naive', bw = 1/5) ## Mode (most likely value): 0.2764435
## Bickel's modal skewness: -0.01733333
## Call: mlv.default(x = aoki, bw = 1/5, method = "naive")
# 95% HPDI
HPDinterval(as.mcmc(aoki), prob = .95)## lower upper
## var1 0.2473656 0.3069499
## attr(,"Probability")
## [1] 0.95
ちなみに、青木のこの年(2012年)の実際の打率は 0.2884615 である。